# Load dataset
# Load necessary libraries
library(readr)
Warning: package ‘readr’ was built under R version 4.4.3
library(dplyr) # %>%
Warning: package ‘dplyr’ was built under R version 4.4.3
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
# Read the CSV file from Google Drive
url <- "https://drive.google.com/uc?export=download&id=12CriTyCML_9rGAySnmcitxXbij7N11T0"
my_data <- read_csv(url)
Rows: 920 Columns: 16── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): sex, dataset, cp, restecg, slope, thal
dbl (8): id, age, trestbps, chol, thalch, oldpeak, ca, num
lgl (2): fbs, exang
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View the first few rows
head(my_data)
NA
NA
NA
#Removing Unnecessary Columns
my_data <- my_data %>%
select(-id, -dataset)
print(my_data)
NA
#Checking for Missing Data
my_data[!complete.cases(my_data),]
#Handling Missing Values
my_data[is.na(my_data$trestbps),]
# Calculate the median
trestbps_medv <- median(my_data$trestbps, na.rm = TRUE)
print(trestbps_medv)
[1] 130
# Replace NA values with the median
my_data$trestbps[is.na(my_data$trestbps)] <- trestbps_medv
# Print updated data
print(my_data)
my_data[is.na(my_data$trestbps),]
NA
# Handling Missing Values
my_data[is.na(my_data$chol),]
chol_meanv<- mean(my_data$chol, na.rm=TRUE)
print(chol_meanv)
[1] 199.1303
my_data$chol[is.na(my_data$chol)] <- chol_meanv
print(my_data)
my_data[is.na(my_data$chol),]
# Handling Missing Values
my_data[is.na(my_data$thalch),]
thalch_meanv<- mean(my_data$thalch, na.rm=TRUE)
print(thalch_meanv)
[1] 137.5457
my_data$thalch[is.na(my_data$thalch)] <- thalch_meanv
print(my_data)
my_data[is.na(my_data$thalch),]
# Handling Missing Values
my_data[is.na(my_data$ca),]
# Find the most frequent value (mode) for 'ca'
mode_ca <- as.numeric(names(sort(table(my_data$ca), decreasing = TRUE))[1])
print(mode_ca)
[1] 0
my_data$ca[is.na(my_data$ca)] <- mode_ca
print(my_data)
my_data[is.na(my_data$ca),]
NA
# Handling Missing Values
my_data[is.na(my_data$thal),]
# Find the most frequent value (mode) for 'thal'
mode_thal <- as.character(names(sort(table(my_data$thal), decreasing = TRUE))[1])
print(mode_thal)
[1] "normal"
my_data$thal[is.na(my_data$thal)] <- mode_thal
print(my_data)
my_data[is.na(my_data$thal),]
NA
# Handling Missing Values
my_data[is.na(my_data$slope),]
# Find the most frequent value (mode) for 'slope'
mode_slope <- names(sort(table(my_data$slope), decreasing = TRUE))[1]
print(mode_slope)
[1] "flat"
my_data$slope[is.na(my_data$slope)] <- mode_slope
print(my_data)
my_data[is.na(my_data$slope),]
NA
#Checking for Remaining Missing Data
my_data[!complete.cases(my_data),]
#Remove records with missing values
my_data<-my_data[complete.cases(my_data),]
print(my_data)
my_data[!complete.cases(my_data),]
# Summary of numeric variables
summary(my_data)
age sex cp trestbps chol fbs
Min. :28.00 Length:770 Length:770 Min. : 0.0 Min. : 0.0 Mode :logical
1st Qu.:46.00 Class :character Class :character 1st Qu.:120.0 1st Qu.:198.0 FALSE:656
Median :54.00 Mode :character Mode :character Median :130.0 Median :228.5 TRUE :114
Mean :53.04 Mean :132.8 Mean :218.8
3rd Qu.:59.75 3rd Qu.:140.0 3rd Qu.:269.0
Max. :77.00 Max. :200.0 Max. :603.0
restecg thalch exang oldpeak slope ca
Length:770 Min. : 60.0 Mode :logical Min. :-1.0000 Length:770 Min. :0.000
Class :character 1st Qu.:120.0 FALSE:466 1st Qu.: 0.0000 Class :character 1st Qu.:0.000
Mode :character Median :140.0 TRUE :304 Median : 0.5000 Mode :character Median :0.000
Mean :138.6 Mean : 0.8874 Mean :0.261
3rd Qu.:159.0 3rd Qu.: 1.5000 3rd Qu.:0.000
Max. :202.0 Max. : 6.2000 Max. :3.000
thal num
Length:770 Min. :0.0000
Class :character 1st Qu.:0.0000
Mode :character Median :1.0000
Mean :0.9195
3rd Qu.:1.0000
Max. :4.0000
hist(my_data$age,
main="Histogram of Age",
col="lightblue",
xlab="Age",
border="black")
hist(my_data$trestbps,
main="Histogram of trestbps ",
col="green",
xlab="trestbps",
border="black")
hist(my_data$chol,
main="Histogram of chol",
col="yellow",
xlab="chol",
border="black")
hist(my_data$thalch,
main="Histogram of thalch ",
col="blue",
xlab="thalch",
border="black")
hist(my_data$oldpeak,
main="Histogram of oldpeak",
col="lightblue",
xlab="oldpeak",
border="black")
hist(my_data$ca,
main="Histogram of Chest pain",
col="lightblue",
xlab="Ca",
border="black")
hist(my_data$num,
main="Histogram of num",
col="lightblue",
xlab="num",
border="black")
# Convert 'restecg' to numeric
my_data$restecg <- as.numeric(factor(my_data$restecg, levels = c("normal", "lv hypertrophy", "st-t abnormality")))
# Convert 'fbs' (fasting blood sugar) to numeric
my_data$fbs <- ifelse(my_data$fbs == TRUE, 1, 0)
# Convert 'slope' to numeric
my_data$slope <- as.numeric(factor(my_data$slope, levels = c("upsloping", "flat", "downsloping")))
my_data$num <- as.factor(my_data$num)
# Update the levels of 'num' to represent meaningful categories
levels(my_data$num) <- c("No Heart Disease", "Heart Disease -Type1", "Heart Disease -Type2", "Heart Disease -Type3","Heart Disease -Type4")
str(my_data)
tibble [770 × 14] (S3: tbl_df/tbl/data.frame)
$ age : num [1:770] 63 67 67 37 41 56 62 57 63 53 ...
$ sex : chr [1:770] "Male" "Male" "Male" "Male" ...
$ cp : chr [1:770] "typical angina" "asymptomatic" "asymptomatic" "non-anginal" ...
$ trestbps: num [1:770] 145 160 120 130 130 120 140 120 130 140 ...
$ chol : num [1:770] 233 286 229 250 204 236 268 354 254 203 ...
$ fbs : num [1:770] 1 0 0 0 0 0 0 0 0 1 ...
$ restecg : num [1:770] 2 2 2 1 2 1 2 1 2 2 ...
$ thalch : num [1:770] 150 108 129 187 172 178 160 163 147 155 ...
$ exang : logi [1:770] FALSE TRUE TRUE FALSE FALSE FALSE ...
$ oldpeak : num [1:770] 2.3 1.5 2.6 3.5 1.4 0.8 3.6 0.6 1.4 3.1 ...
$ slope : num [1:770] 3 2 2 3 1 1 3 1 2 3 ...
$ ca : num [1:770] 0 3 2 0 0 0 2 0 1 0 ...
$ thal : chr [1:770] "fixed defect" "normal" "reversable defect" "normal" ...
$ num : Factor w/ 5 levels "No Heart Disease",..: 1 3 2 1 1 1 4 1 3 2 ...
# Print the first few rows to verify the new labels
head(my_data)
NA
hist(my_data$restecg,
main="Histogram of restecg",
col="lightblue",
xlab="restecg",
border="black")
hist(my_data$fbs,
main="Histogram of fbs",
col="lightblue",
xlab="fbs",
border="black")
hist(my_data$slope,
main="Histogram of slope",
col="lightblue",
xlab="slope",
border="black")
# Density Plot (Smooth Distribution)
plot(density(my_data$age),
main="Density Plot of Age",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$trestbps),
main="Density Plot of trestbps",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$chol),
main="Density Plot of chol ",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$thalch),
main="Density Plot of thalch",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$oldpeak),
main="Density Plot of oldpeak",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$slope),
main="Density Plot of slope",
col="blue",
lwd=2)
# Density Plot (Smooth Distribution)
plot(density(my_data$restecg),
main="Density Plot of restecg",
col="blue",
lwd=2)
# Boxplot
boxplot(my_data$age,
main="Box Plot of Age",
col="orange")
# Boxplot
boxplot(my_data$trestbps,
main="Box Plot of trestbps ",
col="orange")
# Boxplot
boxplot(my_data$chol,
main="Box Plot of chol",
col="orange")
# Boxplot
boxplot(my_data$restecg,
main="Box Plot of restecg ",
col="orange")
# Boxplot
boxplot(my_data$thalch,
main="Box Plot of thalch",
col="orange")
# Boxplot
boxplot(my_data$oldpeak,
main="Box Plot of oldpeak",
col="orange")
# Count of each num of affected people
table(my_data$num)
No Heart Disease Heart Disease -Type1 Heart Disease -Type2 Heart Disease -Type3 Heart Disease -Type4
375 210 81 80 24
# Bar plot for count
barplot(table(my_data$num),
main="Count of num",
col=c("red", "green", "blue"))
# Bar plot for count
barplot(table(my_data$sex),
main="Count of sex",
col=c("red", "green", "blue"))
# Bar plot for count
barplot(table(my_data$cp),
main="Count of cp",
col=c("red", "green", "blue"))
# Bar plot for count
barplot(table(my_data$trestbps),
main="Count of trestbps",
col=c("red", "green", "blue"))
# Bar plot for count
barplot(table(my_data$slope),
main="Count of slope",
col=c("red", "green", "blue"))
# Bar plot for Species count
barplot(table(my_data$thal),
main="Count of thal",
col=c("red", "green", "blue"))
# Scatter plot: age vs trestbps
plot(my_data$age, my_data$trestbps,
main="Scatter Plot of Age vs trestbps",
xlab="age", ylab="trestbps",
col=my_data$num, pch=20)
legend("bottomright", legend=levels(my_data$num), col=1:4, pch=19)
# Scatter plot: thalch vs chol
plot(my_data$thalch, my_data$chol,
main="Scatter Plot of thalch vs chol",
xlab="thalch", ylab="chol",
col=my_data$num, pch=20)
legend("topleft", legend=levels(my_data$num), col=1:4, pch=20)
# Scatter plot: thalch vs trestbps
plot(my_data$thalch, my_data$trestbps,
main="Scatter Plot of thalch vs trestbps",
xlab="thalch", ylab="trestbps",
col=my_data$num, pch=20)
legend("bottomleft", legend=levels(my_data$num), col=1:4, pch=20)
# Correlation between age and trestbs
cor(my_data$age, my_data$trestbps)
[1] 0.2409653
# Correlation between age and trestbs
cor(my_data$age, my_data$chol)
[1] -0.06950973
# Correlation between chol and trestbs
cor(my_data$chol, my_data$trestbps)
[1] 0.0599012
# Correlation between thalch and trestbs
cor(my_data$thalch, my_data$trestbps)
[1] -0.109774
# Correlation between age and thalch
cor(my_data$age, my_data$thalch)
[1] -0.3739944
# Correlation between age and slope
cor(my_data$age, my_data$slope)
[1] 0.06146084
# Select only numeric columns
numeric_cols <- sapply(my_data, is.numeric)
correlation_matrix <- cor(my_data[, numeric_cols], use = "complete.obs")
# Print the correlation matrix
print(correlation_matrix)
age trestbps chol fbs restecg thalch oldpeak slope ca
age 1.00000000 0.24096532 -0.06950973 0.22647439 0.20749775 -0.37399438 0.26091537 0.06146084 0.24956877
trestbps 0.24096532 1.00000000 0.05990120 0.15692273 0.10034657 -0.10977400 0.17080414 0.08097304 0.02804014
chol -0.06950973 0.05990120 1.00000000 0.02907186 -0.06549605 0.19940868 0.05526483 -0.05522410 0.15579945
fbs 0.22647439 0.15692273 0.02907186 1.00000000 0.12153080 -0.05949849 0.05546392 0.06742084 0.07785460
restecg 0.20749775 0.10034657 -0.06549605 0.12153080 1.00000000 -0.09568953 0.10888213 0.05004836 0.03359935
thalch -0.37399438 -0.10977400 0.19940868 -0.05949849 -0.09568953 1.00000000 -0.18268766 -0.31214858 0.03739947
oldpeak 0.26091537 0.17080414 0.05526483 0.05546392 0.10888213 -0.18268766 1.00000000 0.31268535 0.23154038
slope 0.06146084 0.08097304 -0.05522410 0.06742084 0.05004836 -0.31214858 0.31268535 1.00000000 -0.10682672
ca 0.24956877 0.02804014 0.15579945 0.07785460 0.03359935 0.03739947 0.23154038 -0.10682672 1.00000000
library(corrplot)
Warning: package ‘corrplot’ was built under R version 4.4.3corrplot 0.95 loaded
# Calculate correlation matrix
numeric_cols <- sapply(my_data, is.numeric)
correlation_matrix <- cor(my_data[, numeric_cols], use = "complete.obs")
# Create heatmap with correlation values
corrplot(correlation_matrix,
method = "color",
type = "upper",
order = "hclust",
tl.col = "black",
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.7,
diag = FALSE)
library(plotly)
Warning: package ‘plotly’ was built under R version 4.4.3Loading required package: ggplot2
Warning: package ‘ggplot2’ was built under R version 4.4.3Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
plot_ly(
x = colnames(correlation_matrix),
y = colnames(correlation_matrix),
z = correlation_matrix,
type = "heatmap",
colorscale = "Viridis",
hoverinfo = "x+y+z"
) %>%
layout(
title = "Interactive Correlation Matrix",
xaxis = list(tickangle = 45),
margin = list(l = 100, r = 100, b = 150)
)
# Boxplot: Cholesterol by Heart Disease Status
boxplot(chol ~ num, data = my_data,
main = "Boxplot of Cholesterol by Heart Disease Status",
xlab = "Heart Disease",
ylab = "Cholesterol",
col = c("skyblue", "tomato"))
# Boxplot: Thalch by Sex
boxplot(thalch ~ sex, data = my_data,
main = "Boxplot of Max Heart Rate by Sex",
xlab = "Sex",
ylab = "Max Heart Rate (thalch)",
col = c("lightpink", "lightblue"))
NA
NA
# Boxplot: Oldpeak by Chest Pain Type
boxplot(oldpeak ~ cp, data = my_data,
main = "Boxplot of ST Depression (Oldpeak) by Chest Pain Type",
xlab = "Chest Pain Type",
ylab = "Oldpeak",
col = rainbow(length(unique(my_data$cp))))
# Boxplot: Trestbps by Slope
boxplot(trestbps ~ slope, data = my_data,
main = "Boxplot of Resting BP by Slope of Peak Exercise ST",
xlab = "Slope Type",
ylab = "Resting Blood Pressure",
col = c("orange", "cyan", "lightgreen"))
library(ggplot2)
ggplot(my_data, aes(x = as.factor(num), y = chol, fill = as.factor(num))) +
geom_boxplot() +
ggtitle("Cholesterol Levels by Heart Disease Class") +
xlab("Heart Disease Class (num)") +
ylab("Serum Cholesterol (mg/dl)") +
theme_minimal()
NA
NA
NA
install.packages(“ggplot2”) # Pair Plot (Multivariate Visualization)
pairs(my_data[, c("age", "trestbps", "chol", "thalch", "oldpeak")],
col = my_data$num,
pch = 19,
main = "Pair Plot of Selected Features Colored by Heart Disease")
library(ggplot2)
my_data$num <- as.factor(my_data$num)
ggplot(my_data, aes(x = age, y = thalch, color = num)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Age vs Max Heart Rate (thalch) by Heart Disease Class") +
xlab("Age") +
ylab("Max Heart Rate Achieved (thalch)")
NA
NA
ggplot(my_data, aes(x = trestbps, y = chol, color = num)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Resting Blood Pressure vs Cholesterol by Heart Disease Class") +
xlab("Resting Blood Pressure (trestbps)") +
ylab("Serum Cholesterol (chol)")
NA
NA
# Scatter plot with regression line for thalch vs chol
ggplot(my_data, aes(x = thalch, y = chol, color = num)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Maximum Heart Rate Achieved vs Cholesterol by Heart Disease Class") +
xlab("Maximum Heart Rate Achieved (thalch)") +
ylab("Serum Cholesterol (chol)") +
theme_minimal()
NA
NA
NA
NA
# Load necessary libraries
library(caret)
Warning: package ‘caret’ was built under R version 4.4.3Loading required package: lattice
# Select numeric columns for scaling
numeric_cols <- c("age", "trestbps", "chol", "thalch", "oldpeak")
# Apply standardization (z-score normalization)
preProcValues <- preProcess(my_data[, numeric_cols], method = c("center", "scale"))
my_data_scaled <- predict(preProcValues, my_data[, numeric_cols])
# Replace original columns with scaled versions
my_data[, numeric_cols] <- my_data_scaled
# Verify the transformation
summary(my_data[, numeric_cols])
age trestbps chol thalch oldpeak
Min. :-2.6527 Min. :-7.1575 Min. :-2.3638 Min. :-3.04530 Min. :-1.7406
1st Qu.:-0.7459 1st Qu.:-0.6883 1st Qu.:-0.2244 1st Qu.:-0.72072 1st Qu.:-0.8184
Median : 0.1015 Median :-0.1492 Median : 0.1052 Median : 0.05414 Median :-0.3573
Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
3rd Qu.: 0.7106 3rd Qu.: 0.3899 3rd Qu.: 0.5428 3rd Qu.: 0.79026 3rd Qu.: 0.5649
Max. : 2.5380 Max. : 3.6245 Max. : 4.1519 Max. : 2.45621 Max. : 4.8994
# Binary encoding for sex (Male=1, Female=0)
my_data$sex <- ifelse(my_data$sex == "Male", 1, 0)
# Ensure fbs is integer (fasting blood sugar > 120 mg/dl)
my_data$fbs <- as.integer(my_data$fbs)
# Convert target variable to factor
my_data$num <- as.factor(my_data$num)
# Identify numeric columns for scaling (excluding already encoded categoricals and target)
numeric_cols <- c("age", "trestbps", "chol", "thalch", "oldpeak", "ca")
# Apply standardization (z-score normalization)
library(caret)
preProcValues <- preProcess(my_data[, numeric_cols], method = c("center", "scale"))
# View the structure of your data
str(my_data)
tibble [770 × 14] (S3: tbl_df/tbl/data.frame)
$ age : num [1:770] 1.05 1.48 1.48 -1.7 -1.28 ...
$ sex : num [1:770] 1 1 1 1 0 1 0 0 1 1 ...
$ cp : chr [1:770] "typical angina" "asymptomatic" "asymptomatic" "non-anginal" ...
$ trestbps: num [1:770] 0.659 1.468 -0.688 -0.149 -0.149 ...
$ chol : num [1:770] 0.154 0.727 0.111 0.338 -0.16 ...
$ fbs : int [1:770] 1 0 0 0 0 0 0 0 0 1 ...
$ restecg : num [1:770] 2 2 2 1 2 1 2 1 2 2 ...
$ thalch : num [1:770] 0.442 -1.186 -0.372 1.875 1.294 ...
$ exang : logi [1:770] FALSE TRUE TRUE FALSE FALSE FALSE ...
$ oldpeak : num [1:770] 1.303 0.565 1.579 2.409 0.473 ...
$ slope : num [1:770] 3 2 2 3 1 1 3 1 2 3 ...
$ ca : num [1:770] 0 3 2 0 0 0 2 0 1 0 ...
$ thal : chr [1:770] "fixed defect" "normal" "reversable defect" "normal" ...
$ num : Factor w/ 5 levels "No Heart Disease",..: 1 3 2 1 1 1 4 1 3 2 ...
# Check the first few rows
head(my_data)
# Function to detect outliers using IQR method
detect_outliers <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
x < lower_bound | x > upper_bound
}
# Check for outliers in numeric columns
outliers_list <- lapply(my_data[, numeric_cols], detect_outliers)
outliers_count <- sapply(outliers_list, sum)
print(outliers_count)
age trestbps chol thalch oldpeak ca
0 25 99 1 15 123
# Boxplots to visualize outliers
par(mfrow = c(2, 3))
for (col in numeric_cols) {
boxplot(my_data[[col]], main = paste("Boxplot of", col))
}
par(mfrow = c(1, 1))
# 1. Robust Scaling
library(caret)
robust_scaler <- preProcess(my_data[, numeric_cols],
method = c("center", "scale", "YeoJohnson"))
my_data[, numeric_cols] <- predict(robust_scaler, my_data[, numeric_cols])
# 2. Check for zero-IQR columns
zero_iqr_cols <- sapply(my_data[, numeric_cols], function(x) IQR(x, na.rm = TRUE) == 0)
if(any(zero_iqr_cols)) {
message("Columns with IQR=0: ", paste(names(zero_iqr_cols)[zero_iqr_cols], collapse = ", "))
valid_cols <- names(zero_iqr_cols)[!zero_iqr_cols]
}
Columns with IQR=0: ca
# 3. Multivariate Outlier Detection (with fallback)
library(MASS)
Attaching package: ‘MASS’
The following object is masked from ‘package:plotly’:
select
The following object is masked from ‘package:dplyr’:
select
if(length(valid_cols) > 1) { # Need at least 2 columns for covariance
tryCatch({
mcd <- cov.mcd(my_data[, valid_cols])
mahalanobis_dist <- mahalanobis(my_data[, valid_cols],
mcd$center,
mcd$cov)
cutoff <- qchisq(0.99, df = length(valid_cols))
outliers <- mahalanobis_dist > cutoff
# 4. Handle outliers (capping approach)
my_data[outliers, valid_cols] <-
apply(my_data[outliers, valid_cols], 2,
function(x) pmin(pmax(x,
quantile(x, 0.01, na.rm = TRUE)),
quantile(x, 0.99, na.rm = TRUE)))
}, error = function(e) {
message("MCD failed, using regular Mahalanobis: ", e$message)
# Fallback to regular Mahalanobis
mahalanobis_dist <- mahalanobis(my_data[, valid_cols],
colMeans(my_data[, valid_cols]),
cov(my_data[, valid_cols]))
cutoff <- qchisq(0.99, df = length(valid_cols))
outliers <- mahalanobis_dist > cutoff
})
} else {
message("Insufficient valid columns for multivariate outlier detection")
outliers <- rep(FALSE, nrow(my_data))
}
# 5. Final Verification
summary(my_data[, numeric_cols])
age trestbps chol thalch oldpeak
Min. :-2.4825139 Min. :-2.328135 Min. :-2.045079 Min. :-2.6480150 Min. :-1.986058
1st Qu.:-0.7616226 1st Qu.:-0.673934 1st Qu.:-0.333804 1st Qu.:-0.7452664 1st Qu.:-0.956260
Median : 0.0639728 Median :-0.107625 Median : 0.001456 Median : 0.0024077 Median :-0.111733
Mean : 0.0005231 Mean : 0.008183 Mean :-0.001064 Mean :-0.0007991 Mean : 0.001899
3rd Qu.: 0.6966808 3rd Qu.: 0.426799 3rd Qu.: 0.498388 3rd Qu.: 0.7790438 3rd Qu.: 0.880230
Max. : 2.7240781 Max. : 3.268579 Max. : 5.235235 Max. : 2.6990588 Max. : 2.361611
ca
Min. :-0.390
1st Qu.:-0.390
Median :-0.390
Mean : 0.000
3rd Qu.:-0.390
Max. : 4.092
boxplot(my_data[, numeric_cols], main = "Post-Processing Distributions")